from gensim import corpora, models
import pickle
import random
random.seed(1)
import numpy as np
np.random.seed(1)
result_folder = '../data/table_1_results/'

congress_party_affiliation_dict = pickle.load(open('congress_party_affiliation', 'rb'))
congress_tweet_dict = pickle.load(open('./congress_tweet_dict_4th_no_timelimit_lengthlimit', 'rb'))
totalArticle_all = []
totalArticle_rep = []
totalArticle_dem = []
for each_people_origin, twitter_list in congress_tweet_dict.items():
    each_people = each_people_origin.lower()
    if each_people in congress_party_affiliation_dict:
        if congress_party_affiliation_dict[each_people] in ['rep', 'r']:
            totalArticle_rep += twitter_list
        elif congress_party_affiliation_dict[each_people] in ['dem', 'd']:
            totalArticle_dem += twitter_list
        totalArticle_all += twitter_list
totalArticle_token = [eacharticle.split() for eacharticle in totalArticle_all]
dictionary = corpora.Dictionary(totalArticle_token)
corpus = [dictionary.doc2bow(article) for article in totalArticle_token]
LDA_model = models.ldamulticore.LdaMulticore(corpus, num_topics=26, id2word=dictionary, passes=45, random_state=1)
pickle.dump(LDA_model, open(result_folder + 'congress_only_topic_model_4th_full_no_timelimit_all', 'wb'))
all_topics = LDA_model.print_topics(num_topics=26, num_words=15)
print('congress word list')
for each_topic in all_topics:
    print(each_topic)
print('===========================')



mayor_party_affiliation_dict = pickle.load(open('./mayor_party_affiliation_dict', 'rb'))
mayor_tweet_dict = pickle.load(open('./mayor_tweet_dict_4th_no_timelimit_lengthlimit', 'rb'))
totalArticle_rep = []
totalArticle_dem = []
totalArticle_all = []
for each_people, twitter_list in mayor_tweet_dict.items():
    if mayor_party_affiliation_dict[each_people] == 'rep':
        totalArticle_rep += twitter_list
    elif mayor_party_affiliation_dict[each_people] == 'dem':
        totalArticle_dem += twitter_list
    totalArticle_all += twitter_list
totalArticle_token = [eacharticle.split() for eacharticle in totalArticle_all]
dictionary = corpora.Dictionary(totalArticle_token)
corpus = [dictionary.doc2bow(article) for article in totalArticle_token]
LDA_model = models.ldamulticore.LdaMulticore(corpus, num_topics=26, id2word=dictionary, passes=45, random_state=1)
pickle.dump(LDA_model, open(result_folder + 'mayor_only_topic_model_4th_full_no_timelimit_all', 'wb'))
all_topics = LDA_model.print_topics(num_topics=26, num_words=15)
print('mayor word list')
for each_topic in all_topics:
    print(each_topic)
print('===========================')




governor_party_affiliation_dict = pickle.load(open('./governor_party_affiliation_dict', 'rb'))
governor_tweet_dict = pickle.load(open('./governor_tweet_dict_4th_no_timelimit_lengthlimit', 'rb'))
totalArticle_rep = []
totalArticle_dem = []
totalArticle_all = []
for each_people, twitter_list in governor_tweet_dict.items():
    if governor_party_affiliation_dict[each_people] == 'r':
        totalArticle_rep += twitter_list
    elif governor_party_affiliation_dict[each_people] == 'd':
        totalArticle_dem += twitter_list
    totalArticle_all += twitter_list
totalArticle_token = [eacharticle.split() for eacharticle in totalArticle_all]
dictionary = corpora.Dictionary(totalArticle_token)
corpus = [dictionary.doc2bow(article) for article in totalArticle_token]
LDA_model = models.ldamulticore.LdaMulticore(corpus, num_topics=26, id2word=dictionary, passes=45, random_state=1)
pickle.dump(LDA_model, open(result_folder + 'governor_only_topic_model_4th_full_no_timelimit_all', 'wb'))
all_topics = LDA_model.print_topics(num_topics=26, num_words=15)
print('governor word list')
for each_topic in all_topics:
    print(each_topic)
print('===========================')
